sessionInfo()
Finding the appropriate Box-Cox transformation depends on the algorithm using BoxCox.lambda() to find the optimal parameters.
library(fpp2)
#help(usnetelec)
#help(usgdp)
#help(mcopper)
#help(enplanements)
lambda <- BoxCox.lambda(usnetelec)
autoplot(BoxCox(usnetelec, lambda))
lambda
lambda <- BoxCox.lambda(usgdp)
autoplot(BoxCox(usgdp, lambda))
lambda
lambda <- BoxCox.lambda(mcopper)
autoplot(BoxCox(mcopper, lambda))
lambda
lambda <- BoxCox.lambda(enplanements)
autoplot(BoxCox(enplanements, lambda))
lambda
help(cangas)
Monthly Canadian gas production, billions of cubic metres, January 1960 - February 2005.
autoplot(cangas)
lambda <- BoxCox.lambda(cangas)
autoplot(BoxCox(cangas, lambda))
lambda
cangas
Why is a Box-Cox transformation
cangasdata?
From the graphs, we see that the autoplot() graphs don't change from choosing the best lambda for the Box-Cox transformation. Since the Box-Cox transformation only transfers non-normal dependent variables into a normal shape, when the original data is not seasonal or normal, the transformation couldn't help with the irrelevance.
If λ = 1, then $w_t = y_{t−1}$, so the transformed data is shifted downwards but there is no change in the shape of the time series. But for all other values of λ, the time series will change shape.
lambda <- BoxCox.lambda(dole)
autoplot(BoxCox(dole, lambda))
autoplot(dole)
dole: Box-Cox transformation takes 0.3291 as $\lambda$ and provides better seasonal trend in visidualization in a normal shape.
lambda <- BoxCox.lambda(usdeaths)
autoplot(BoxCox(usdeaths, lambda))
usdeaths: Same as before without any Box-Cox transformation.
lambda <- BoxCox.lambda(bricksq)
autoplot(BoxCox(bricksq, lambda))
bricksq: Same as before without any Box-Cox transformation. Because the original dataset doesn't show a highly seasonal trend, the $(y_t^\lambda - 1)/\lambda$ transformation couldn't create a relationship.
beer <- window(ausbeer, start = 1992)
fc <- snaive(beer)
autoplot(fc)
res <- residuals(fc)
autoplot(res)
checkresiduals(fc)
I conclude that the seasonal naïve forecast might not be the appropriate approach for the quarterly Australian beer production data because the residuals are not normally distributed and splatted with white noise. First of all, the residuals are somehow moving around 0 but it seems more lying below the x-axis. Secondly, there shows the white noise around lag-4 in the ACF chart and residual graph is not a normal distribution.
retaildata <- readxl::read_excel("/Users/apple/Desktop/bc_f19_econ/Forecasting/data/retail.xlsx", skip = 1)
myts <- ts(retaildata[,"A3349873A"], frequency=12, start=c(1982,4))
# split two parts
myts.train <- window(myts, end = c(2010, 12))
myts.test <- window(myts, start = 2011)
autoplot(myts) +
autolayer(myts.train, series = "Training") +
autolayer(myts.test, series = "Test")
# simple naïve forecasts
fc <- snaive(myts.train)
# compare accuracy ???
accuracy(fc, myts.test)
checkresiduals(fc)
hsales
autoplot(hsales)
lambda <- BoxCox.lambda(hsales)
autoplot(BoxCox(hsales, lambda))
lambda
# Not a huge change
hsales.train <- window(hsales, end = c(1993, 12))
hsales.test <- window(hsales, start = 1994)
autoplot(hsales) +
autolayer(hsales.train, series = "Train") +
autolayer(hsales.test, series = "Test")
fc1 <- meanf(hsales.train)
fc2 <- naive(hsales.train) # should be equivalent alternative to naive
fc3 <- rwf(hsales.train)
fc4 <- snaive(hsales.train)
fc5 <- rwf(hsales.train, drift = TRUE)
identical(fc2, fc3)
all.equal(fc2, fc3)
autoplot(hsales) +
autolayer(meanf(hsales.train, h = 50),
series = "Mean", PI = FALSE) +
autolayer(naive(hsales.train, h = 50),
series = "Naïve", PI = FALSE) +
autolayer(rwf(hsales.train, drift = TRUE, h = 50),
series = "Drift", PI = FALSE) +
autolayer(snaive(hsales.train, h = 50),
series = "Simple Naïve", PI = FALSE) +
ggtitle("New One-Family House in the US") +
xlab("Year") + ylab("Sales $") +
guides(color = guide_legend(title = "Forecast Train"))
From the comparison graph, it seems simple naïve works the best with prediction on train dataset against test set. Therefore, we choose snaive() and save fc4 as the final method.
After checking the residuals of simple naïve method, we found that the residual graphs show normal distribution overall; however, there are lots of white noise exceeding the higher benchmark as well as the lower one on the ACF chart. Additionally, a lot of residual points splatter around 0, but more lying below the x-axis. Overall, all of the prediction methods we learned so far might not be a proper one to predict the house sales data.
checkresiduals(fc4)
library(ggplot2)
#install.packages('fpp2')
library(fpp2)
?gold
Daily morning gold prices in US dollars. 1 January 1985 – 31 March 1989.
?woolyrnq
Quarterly production of woollen yarn in Australia: tonnes. Mar 1965 – Sep 1994.
?gas
Australian monthly gas production: 1956–1995.
tute1 <- read.csv("/Users/apple/Desktop/bc_f19_econ/Forecasting/data/tute1.csv", header = TRUE)
# b.
mytimeseries <- ts(tute1[,-1], start = 1981, frequency = 4)
# The [,-1] removes the first column which contains the quarters.
# c.
autoplot(mytimeseries, facets = TRUE)
autoplot(mytimeseries)
ggplot() +
geom_point(data = tute1, aes(GDP, Sales)) +
geom_point(data = tute1, aes(GDP, Sales), colour = "red", size = 3)
plot(mytimeseries)
# a.
retaildata <- readxl::read_excel("/Users/apple/Desktop/bc_f19_econ/Forecasting/data/retail.xlsx", skip = 1)
# skip=1 is required because the Excel sheet has two header rows
samplets <- ts(retaildata[, "A3349873A"],
frequency = 12, start = c(1982, 4))
# b.
myts <- ts(retaildata[, "A3349588R"],
frequency = 12, start = c(1982, 4))
autoplot(samplets)
ggseasonplot(myts)
help(bicoal)
autoplot(bicoal)
help(chicken)
autoplot(chicken)
help(dole)
autoplot(dole)
help(usdeaths)
autoplot(usdeaths)
help(lynx)
autoplot(lynx)
#!!!! !
help(goog)
autoplot(goog) +
ggtitle("Google Stock Prices") +
xlab("Time") + ylab("Daily Prices $")